{
case PGT_l1_page_table:
free_l1_table(page);
-#ifdef CONFIG_SHADOW
- // assume we're in shadow mode if PSH_shadowed set
- if ( current->mm.shadowmode && page->shadow_and_flags & PSH_shadowed )
+ if ( unlikely(current->mm.shadow_mode) &&
+ (get_shadow_status(current, page-frame_table) & PSH_shadowed) )
+ {
unshadow_table( page-frame_table, type );
-#endif
+ put_shadow_status(current);
+ }
return;
case PGT_l2_page_table:
free_l2_table(page);
-#ifdef CONFIG_SHADOW
- // assume we're in shadow mode if PSH_shadowed set
- if ( current->mm.shadowmode && page->shadow_and_flags & PSH_shadowed )
+ if ( unlikely(current->mm.shadow_mode) &&
+ (get_shadow_status(current, page-frame_table) & PSH_shadowed) )
+ {
unshadow_table( page-frame_table, type );
-#endif
+ put_shadow_status(current);
+ }
return;
default:
put_page_and_type(&frame_table[pagetable_val(current->mm.pagetable)
>> PAGE_SHIFT]);
current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
-#ifdef CONFIG_SHADOW
- current->mm.shadowtable =
- shadow_mk_pagetable(pfn << PAGE_SHIFT, current->mm.shadowmode);
-#endif
- invalidate_shadow_ldt();
+ if( unlikely(current->mm.shadow_mode))
+ current->mm.shadow_table =
+ shadow_mk_pagetable(current, pfn<<PAGE_SHIFT);
+
+ invalidate_shadow_ldt();
+
+ // start using the new PT straight away
percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
-#ifdef CONFIG_SHADOW
- if ( unlikely(current->mm.shadowmode) )
+ if ( unlikely(current->mm.shadow_mode) )
{
- check_pagetable( current->mm.pagetable, "pre-stlb-flush" );
- write_cr3_counted(pagetable_val(current->mm.shadowtable));
+ check_pagetable( current,
+ current->mm.pagetable, "pre-stlb-flush" );
+ write_cr3_counted(pagetable_val(current->mm.shadow_table));
}
else
-#endif
write_cr3_counted(pagetable_val(current->mm.pagetable));
}
else
struct pfn_info *page;
int rc = 0, okay = 1, i, cpu = smp_processor_id();
unsigned int cmd;
-#ifdef CONFIG_SHADOW
unsigned long prev_spfn = 0;
l1_pgentry_t *prev_spl1e = 0;
-#endif
perfc_incrc(calls_to_mmu_update);
perfc_addc(num_page_updates, count);
okay = mod_l1_entry((l1_pgentry_t *)va,
mk_l1_pgentry(req.val));
-#ifdef CONFIG_SHADOW
- if ( okay && page->shadow_and_flags & PSH_shadowed )
+ if ( okay && unlikely(current->mm.shadow_mode) &&
+ (get_shadow_status(current, page-frame_table) &
+ PSH_shadowed) )
+ {
shadow_l1_normal_pt_update( req.ptr, req.val,
&prev_spfn, &prev_spl1e );
-#endif
+ put_shadow_status(current);
+ }
put_page_type(page);
}
okay = mod_l2_entry((l2_pgentry_t *)va,
mk_l2_pgentry(req.val),
pfn);
-#ifdef CONFIG_SHADOW
- if ( okay && page->shadow_and_flags & PSH_shadowed )
+
+ if ( okay && unlikely(current->mm.shadow_mode) &&
+ (get_shadow_status(current, page-frame_table) &
+ PSH_shadowed) )
+ {
shadow_l2_normal_pt_update( req.ptr, req.val );
-#endif
+ put_shadow_status(current);
+ }
put_page_type(page);
}
okay = 1;
put_page_type(page);
-#ifdef CONFIG_SHADOW
- if ( page->shadow_and_flags & PSH_shadowed )
- BUG();
- // at present, we shouldn't be shadowing such pages
-#endif
-
-
+ // at present, we don't shadowing such pages
}
break;
}
-check_pagetable( current->mm.pagetable, "mmu" ); // XXX XXX XXX XXX XXX
-
put_page(page);
break;
if ( prev_pfn != 0 )
unmap_domain_mem((void *)va);
-#ifdef CONFIG_SHADOW
if( prev_spl1e != 0 )
unmap_domain_mem((void *)prev_spl1e);
-#endif
deferred_ops = percpu_info[cpu].deferred_ops;
percpu_info[cpu].deferred_ops = 0;
if ( deferred_ops & DOP_FLUSH_TLB )
{
-#ifdef CONFIG_SHADOW
- if ( unlikely(current->mm.shadowmode) )
+ if ( unlikely(current->mm.shadow_mode) )
{
- check_pagetable( current->mm.pagetable, "pre-stlb-flush" );
- write_cr3_counted(pagetable_val(current->mm.shadowtable));
+ check_pagetable( current,
+ current->mm.pagetable, "pre-stlb-flush" );
+ write_cr3_counted(pagetable_val(current->mm.shadow_table));
}
else
-#endif
- write_cr3_counted(pagetable_val(current->mm.pagetable));
+ write_cr3_counted(pagetable_val(current->mm.pagetable));
}
if ( deferred_ops & DOP_RELOAD_LDT )
mk_l1_pgentry(val))) )
err = -EINVAL;
-#ifdef CONFIG_SHADOW
-
- if ( unlikely(p->mm.shadowmode) )
+ if ( unlikely(p->mm.shadow_mode) )
{
unsigned long sval = 0;
{
// Since L2's are guranteed RW, failure indicates the page
// was not shadowed, so ignore.
-
+ perfc_incrc(shadow_update_va_fail);
//MEM_LOG("update_va_map: couldn't write update\n");
}
- }
-check_pagetable( p->mm.pagetable, "va" );
+ check_pagetable( p, p->mm.pagetable, "va" ); // debug
+
+ }
-#endif
deferred_ops = percpu_info[cpu].deferred_ops;
percpu_info[cpu].deferred_ops = 0;
if ( unlikely(deferred_ops & DOP_FLUSH_TLB) ||
unlikely(flags & UVMF_FLUSH_TLB) )
{
-#ifdef CONFIG_SHADOW
- if ( unlikely(p->mm.shadowmode) )
- write_cr3_counted(pagetable_val(p->mm.shadowtable));
+ if ( unlikely(p->mm.shadow_mode) )
+ write_cr3_counted(pagetable_val(p->mm.shadow_table));
else
-#endif
- write_cr3_counted(pagetable_val(p->mm.pagetable));
+ write_cr3_counted(pagetable_val(p->mm.pagetable));
}
else if ( unlikely(flags & UVMF_INVLPG) )
__flush_tlb_one(page_nr << PAGE_SHIFT);
#include <asm/domain_page.h>
#include <asm/page.h>
-#ifdef CONFIG_SHADOW
-
-
-#if SHADOW_DEBUG
-#define MEM_VLOG(_f, _a...) \
- printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
- current->domain , __LINE__ , ## _a )
-#else
-#define MEM_VLOG(_f, _a...)
-#endif
-
-#if 0
-#define MEM_VVLOG(_f, _a...) \
- printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
- current->domain , __LINE__ , ## _a )
-#else
-#define MEM_VVLOG(_f, _a...)
-#endif
-
/********
To use these shadow page tables, guests must not rely on the ACCESSED
and DIRTY bits on L2 pte's being accurate -- they will typically all be set.
+
I doubt this will break anything. (If guests want to use the va_update
mechanism they've signed up for this anyhow...)
********/
-pagetable_t shadow_mk_pagetable( unsigned long gptbase,
- unsigned int shadowmode )
+int shadow_mode_enable( struct task_struct *p, unsigned int mode )
{
- unsigned long gpfn, spfn=0;
+ struct shadow_status **fptr;
+ int i;
+
+ // sychronously stop domain
+ // XXX for the moment, only use on already stopped domains!!!
+
+ spin_lock_init(&p->mm.shadow_lock);
+ spin_lock(&p->mm.shadow_lock);
+
+ p->mm.shadow_mode = mode;
+
+ // allocate hashtable
+ p->mm.shadow_ht = kmalloc( shadow_ht_buckets *
+ sizeof(struct shadow_status), GFP_KERNEL );
+ if( ! p->mm.shadow_ht )
+ goto nomem;
- MEM_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
- gptbase, shadowmode );
+ memset( p->mm.shadow_ht, 0, shadow_ht_buckets *
+ sizeof(struct shadow_status) );
- if ( unlikely(shadowmode) )
+
+ // allocate space for first lot of extra nodes
+ p->mm.shadow_ht_extras = kmalloc( sizeof(void*) + (shadow_ht_extra_size *
+ sizeof(struct shadow_status)), GFP_KERNEL );
+
+ if( ! p->mm.shadow_ht_extras )
+ goto nomem;
+
+ memset( p->mm.shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size *
+ sizeof(struct shadow_status)) );
+
+ // add extras to free list
+ fptr = &p->mm.shadow_ht_free;
+ for ( i=0; i<shadow_ht_extra_size; i++ )
{
- gpfn = gptbase >> PAGE_SHIFT;
-
- if ( likely(frame_table[gpfn].shadow_and_flags & PSH_shadowed) )
- {
- spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+ *fptr = &p->mm.shadow_ht_extras[i];
+ fptr = &(p->mm.shadow_ht_extras[i].next);
+ }
+ *fptr = NULL;
+ *((struct shadow_status ** ) &p->mm.shadow_ht_extras[shadow_ht_extra_size]) = NULL;
+
+ spin_unlock(&p->mm.shadow_lock);
+
+ // call shadow_mk_pagetable
+ p->mm.shadow_table = shadow_mk_pagetable( p,
+ pagetable_val(p->mm.pagetable) );
+
+ return 0;
+
+nomem:
+ spin_unlock(&p->mm.shadow_lock);
+ return -ENOMEM;
+}
+
+void shadow_mode_disable( )
+{
+
+ // free the hash buckets as you go
+
+ // free the hashtable itself
+}
+
+
+static inline void free_shadow_page( struct task_struct *p, unsigned int pfn )
+{
+ unsigned long flags;
+
+ p->mm.shadow_page_count--;
+
+ spin_lock_irqsave(&free_list_lock, flags);
+ list_add(&frame_table[pfn].list, &free_list);
+ free_pfns++;
+ spin_unlock_irqrestore(&free_list_lock, flags);
+}
+
+static inline struct pfn_info *alloc_shadow_page( struct task_struct *p )
+{
+ p->mm.shadow_page_count++;
+
+ return alloc_domain_page( NULL );
+}
+
+
+static void __free_shadow_table( struct task_struct *p )
+{
+ int j;
+ struct shadow_status *a;
+
+ // the code assumes you're not using the page tables i.e.
+ // the domain is stopped and cr3 is something else!!
+
+ // walk the hash table and call free_shadow_page on all pages
+
+ for(j=0;j<shadow_ht_buckets;j++)
+ {
+ a = &p->mm.shadow_ht[j];
+ if (a->pfn)
+ {
+ free_shadow_page( p, a->spfn_and_flags & PSH_pfn_mask );
+ a->pfn = 0;
+ a->spfn_and_flags = 0;
+ }
+ a=a->next;
+ while(a)
+ {
+ struct shadow_status *next = a->next;
+ free_shadow_page( p, a->spfn_and_flags & PSH_pfn_mask );
+ a->pfn = 0;
+ a->spfn_and_flags = 0;
+ a->next = p->mm.shadow_ht_free;
+ p->mm.shadow_ht_free = a;
+ a=next;
}
- else
- {
- spfn = shadow_l2_table( gpfn );
- }
}
+}
+
+static void flush_shadow_table( struct task_struct *p )
+{
+
+ // XXX synchronously stop domain (needed for SMP guests)
+
+ // switch to idle task's page tables
+
+ // walk the hash table and call free_shadow_page on all pages
+ spin_lock(&p->mm.shadow_lock);
+ __free_shadow_table( p );
+ spin_unlock(&p->mm.shadow_lock);
- return mk_pagetable(spfn << PAGE_SHIFT);
+ // XXX unpause domain
}
+
+
void unshadow_table( unsigned long gpfn, unsigned int type )
{
unsigned long spfn;
- MEM_VLOG("unshadow_table type=%08x gpfn=%08lx, spfn=%08lx",
+ SH_VLOG("unshadow_table type=%08x gpfn=%08lx",
type,
- gpfn,
- frame_table[gpfn].shadow_and_flags & PSH_pfn_mask );
+ gpfn );
perfc_incrc(unshadow_table_count);
// even in the SMP guest case, there won't be a race here as
// this CPU was the one that cmpxchg'ed the page to invalid
- spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
- frame_table[gpfn].shadow_and_flags=0;
- frame_table[spfn].shadow_and_flags=0;
+ spfn = __shadow_status(current, gpfn) & PSH_pfn_mask;
+ delete_shadow_status(current, gpfn);
#if 0 // XXX leave as might be useful for later debugging
{
else
perfc_decr(shadow_l2_pages);
- //free_domain_page( &frame_table[spfn] );
-
- {
- unsigned long flags;
- spin_lock_irqsave(&free_list_lock, flags);
- list_add(&frame_table[spfn].list, &free_list);
- free_pfns++;
- spin_unlock_irqrestore(&free_list_lock, flags);
- }
+ free_shadow_page( current, spfn );
}
-unsigned long shadow_l2_table( unsigned long gpfn )
+static unsigned long shadow_l2_table(
+ struct task_struct *p, unsigned long gpfn )
{
struct pfn_info *spfn_info;
unsigned long spfn;
l2_pgentry_t *spl2e, *gpl2e;
int i;
- MEM_VVLOG("shadow_l2_table( %08lx )",gpfn);
+ SH_VVLOG("shadow_l2_table( %08lx )",gpfn);
+ spin_lock(&p->mm.shadow_lock);
perfc_incrc(shadow_l2_table_count);
perfc_incr(shadow_l2_pages);
// XXX in future, worry about racing in SMP guests
// -- use cmpxchg with PSH_pending flag to show progress (and spin)
- spfn_info = alloc_domain_page( NULL ); // XXX account properly later
+ spfn_info = alloc_shadow_page(p);
ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache
spfn = (unsigned long) (spfn_info - frame_table);
// mark pfn as being shadowed, update field to point at shadow
- frame_table[gpfn].shadow_and_flags = spfn | PSH_shadowed;
-
- // mark shadow pfn as being a shadow, update field to point at pfn
- frame_table[spfn].shadow_and_flags = gpfn | PSH_shadow;
+ set_shadow_status(p, gpfn, spfn | PSH_shadowed);
// we need to do this before the linear map is set up
spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
if (gpte & _PAGE_PRESENT)
{
unsigned long s_sh =
- frame_table[ gpte>>PAGE_SHIFT ].shadow_and_flags;
+ __shadow_status(p, gpte>>PAGE_SHIFT);
if( s_sh & PSH_shadowed ) // PSH_shadowed
{
- if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) )
+ if ( unlikely( (__shadow_status(p, gpte>>PAGE_SHIFT) & PGT_type_mask) == PGT_l2_page_table) )
{
printk("Linear mapping detected\n");
spte = gpte & ~_PAGE_RW;
unmap_domain_mem( gpl2e );
unmap_domain_mem( spl2e );
- MEM_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
-
+ SH_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
+ spin_unlock(&p->mm.shadow_lock);
return spfn;
}
+pagetable_t shadow_mk_pagetable( struct task_struct *p,
+ unsigned long gptbase)
+{
+ unsigned long gpfn, spfn=0;
+
+ SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
+ gptbase, p->mm.shadow_mode );
+
+ if ( likely(p->mm.shadow_mode) ) // should always be true if we're here
+ {
+ gpfn = gptbase >> PAGE_SHIFT;
+
+ if ( unlikely((spfn=__shadow_status(p, gpfn)) == 0 ) )
+ {
+ spfn = shadow_l2_table(p, gpfn );
+ }
+ }
+
+ SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
+ gptbase, p->mm.shadow_mode );
+
+ return mk_pagetable(spfn<<PAGE_SHIFT);
+}
int shadow_fault( unsigned long va, long error_code )
{
unsigned long gpte, spte;
- MEM_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
+ SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
- check_pagetable( current->mm.pagetable, "pre-sf" );
+ spin_lock(¤t->mm.shadow_lock);
+
+ check_pagetable( current, current->mm.pagetable, "pre-sf" );
if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
{
- MEM_VVLOG("shadow_fault - EXIT: read gpte faulted" );
+ SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
+ spin_unlock(¤t->mm.shadow_lock);
return 0; // propagate to guest
}
if ( ! (gpte & _PAGE_PRESENT) )
{
- MEM_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
+ SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
+ spin_unlock(¤t->mm.shadow_lock);
return 0; // we're not going to be able to help
}
+
spte = gpte;
if ( error_code & 2 )
}
else
{ // write fault on RO page
- MEM_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
+ SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
+ spin_unlock(¤t->mm.shadow_lock);
return 0; // propagate to guest
// not clear whether we should set accessed bit here...
}
spte &= ~_PAGE_RW; // force clear unless already dirty
}
- MEM_VVLOG("plan: gpte=%08lx spte=%08lx", gpte, spte );
+ SH_VVLOG("plan: gpte=%08lx spte=%08lx", gpte, spte );
// write back updated gpte
// XXX watch out for read-only L2 entries! (not used in Linux)
unsigned long gpde, spde, gl1pfn, sl1pfn;
- MEM_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx spte=%08lx",gpte,spte );
+ SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx spte=%08lx",gpte,spte );
gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
gl1pfn = gpde>>PAGE_SHIFT;
- if ( ! (frame_table[gl1pfn].shadow_and_flags & PSH_shadowed ) )
+ if ( ! (sl1pfn=__shadow_status(current, gl1pfn) ) )
{
// this L1 is NOT already shadowed so we need to shadow it
struct pfn_info *sl1pfn_info;
sl1pfn_info = alloc_domain_page( NULL ); // XXX account properly!
sl1pfn = sl1pfn_info - frame_table;
- MEM_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
+ SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
perfc_incrc(shadow_l1_table_count);
perfc_incr(shadow_l1_pages);
- sl1pfn_info->shadow_and_flags = PSH_shadow | gl1pfn;
- frame_table[gl1pfn].shadow_and_flags = PSH_shadowed | sl1pfn;
+ set_shadow_status(current, gl1pfn, PSH_shadowed | sl1pfn);
gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
spde = (gpde & ~PAGE_MASK) | _PAGE_RW | (sl1pfn<<PAGE_SHIFT);
// this L1 was shadowed (by another PT) but we didn't have an L2
// entry for it
- sl1pfn = frame_table[gl1pfn].shadow_and_flags & PSH_pfn_mask;
-
- MEM_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
+ SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gl1pfn<<PAGE_SHIFT) ) )
{ // detect linear map, and keep pointing at guest
- MEM_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
+ SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
spde = (spde & ~PAGE_MASK) | (gl1pfn<<PAGE_SHIFT);
}
perfc_incrc(shadow_fixup_count);
- check_pagetable( current->mm.pagetable, "post-sf" );
+ check_pagetable( current, current->mm.pagetable, "post-sf" );
+
+ spin_unlock(¤t->mm.shadow_lock);
return 1; // let's try the faulting instruction again...
l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr;
-MEM_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%08lx\n",
+SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%08lx\n",
pa,gpte,prev_spfn, prev_spl1e);
// to get here, we know the l1 page *must* be shadowed
gpfn = pa >> PAGE_SHIFT;
- spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+ spfn = __shadow_status(current, gpfn) & PSH_pfn_mask;
if ( spfn == prev_spfn )
{
{
unsigned long gpfn, spfn, spte;
l2_pgentry_t * sp2le;
- unsigned long s_sh;
+ unsigned long s_sh=0;
- MEM_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
+ SH_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
// to get here, we know the l2 page has a shadow
gpfn = pa >> PAGE_SHIFT;
- spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+ spfn = __shadow_status(current, gpfn) & PSH_pfn_mask;
- sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
- // no real need for a cache here
spte = 0;
- s_sh = frame_table[gpte >> PAGE_SHIFT].shadow_and_flags;
+ if( gpte & _PAGE_PRESENT )
+ s_sh = __shadow_status(current, gpte >> PAGE_SHIFT);
+
+ sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
+ // no real need for a cache here
if ( s_sh ) // PSH_shadowed
{
#define FAIL(_f, _a...) \
{printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n", sh_check_name, level, i, ## _a , gpte, spte ); BUG();}
-int check_pte( unsigned long gpte, unsigned long spte, int level, int i )
+static int check_pte( struct task_struct *p,
+ unsigned long gpte, unsigned long spte, int level, int i )
{
unsigned long mask, gpfn, spfn;
if ( level > 1 )
FAIL("Linear map ???"); // XXX this will fail on BSD
-#if 0 // might be a RO mapping of a page table page
- if ( frame_table[gpfn].shadow_and_flags != 0 )
- {
- FAIL("Should have been shadowed g.sf=%08lx s.sf=%08lx",
- frame_table[gpfn].shadow_and_flags,
- frame_table[spfn].shadow_and_flags);
- }
- else
-#endif
- return 1;
+ return 1;
}
else
{
if ( level < 2 )
FAIL("Shadow in L1 entry?");
- if ( frame_table[gpfn].shadow_and_flags != (PSH_shadowed | spfn) )
- FAIL("spfn problem g.sf=%08lx s.sf=%08lx [g.sf]=%08lx [s.sf]=%08lx",
- frame_table[gpfn].shadow_and_flags,
- frame_table[spfn].shadow_and_flags,
- frame_table[frame_table[gpfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags,
- frame_table[frame_table[spfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags
- );
-
- if ( frame_table[spfn].shadow_and_flags != (PSH_shadow | gpfn) )
- FAIL("gpfn problem g.sf=%08lx s.sf=%08lx",
- frame_table[gpfn].shadow_and_flags,
- frame_table[spfn].shadow_and_flags);
-
+ if ( __shadow_status(p, gpfn) != (PSH_shadowed | spfn) )
+ FAIL("spfn problem g.sf=%08lx",
+ __shadow_status(p, gpfn) );
}
return 1;
}
-int check_l1_table( unsigned long va, unsigned long g2, unsigned long s2 )
+static int check_l1_table( struct task_struct *p, unsigned long va,
+ unsigned long g2, unsigned long s2 )
{
int j;
unsigned long *gpl1e, *spl1e;
unsigned long gpte = gpl1e[j];
unsigned long spte = spl1e[j];
- check_pte( gpte, spte, 1, j );
+ check_pte( p, gpte, spte, 1, j );
}
unmap_domain_mem( spl1e );
#define FAILPT(_f, _a...) \
{printk("XXX FAIL %s-PT" _f "\n", s, ## _a ); BUG();}
-int check_pagetable( pagetable_t pt, char *s )
+int check_pagetable( struct task_struct *p, pagetable_t pt, char *s )
{
unsigned long gptbase = pagetable_val(pt);
unsigned long gpfn, spfn;
sh_check_name = s;
- MEM_VVLOG("%s-PT Audit",s);
+ SH_VVLOG("%s-PT Audit",s);
sh_l2_present = sh_l1_present = 0;
gpfn = gptbase >> PAGE_SHIFT;
- if ( ! (frame_table[gpfn].shadow_and_flags & PSH_shadowed) )
+ if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) )
{
printk("%s-PT %08lx not shadowed\n", s, gptbase);
- if( frame_table[gpfn].shadow_and_flags != 0 ) BUG();
+ if( __shadow_status(p, gpfn) != 0 ) BUG();
return 0;
}
- spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+ spfn = __shadow_status(p, gpfn) & PSH_pfn_mask;
- if ( ! frame_table[gpfn].shadow_and_flags == (PSH_shadowed | spfn) )
+ if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) )
FAILPT("ptbase shadow inconsistent1");
- if ( ! frame_table[spfn].shadow_and_flags == (PSH_shadow | gpfn) )
- FAILPT("ptbase shadow inconsistent2");
-
gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
unsigned long gpte = l2_pgentry_val(gpl2e[i]);
unsigned long spte = l2_pgentry_val(spl2e[i]);
- check_pte( gpte, spte, 2, i );
+ check_pte( p, gpte, spte, 2, i );
}
unsigned long spte = l2_pgentry_val(spl2e[i]);
if ( spte )
- check_l1_table(
+ check_l1_table( p,
i<<L2_PAGETABLE_SHIFT,
gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
unmap_domain_mem( spl2e );
unmap_domain_mem( gpl2e );
- MEM_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
+ SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
sh_l2_present, sh_l1_present );
return 1;
#endif
-#endif // CONFIG_SHADOW
#ifndef _XENO_SHADOW_H
#define _XENO_SHADOW_H
-#ifdef CONFIG_SHADOW
-
#include <xeno/config.h>
#include <xeno/types.h>
#include <xeno/mm.h>
+#include <xeno/perfc.h>
/* Shadow PT flag bits in pfn_info */
#define PSH_shadowed (1<<31) /* page has a shadow. PFN points to shadow */
-#define PSH_shadow (1<<30) /* page is a shadow. PFN points to orig page */
#define PSH_pending (1<<29) /* page is in the process of being shadowed */
#define PSH_pfn_mask ((1<<21)-1)
#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
-extern pagetable_t shadow_mk_pagetable( unsigned long gptbase, unsigned int shadowmode );
-extern void unshadow_table( unsigned long gpfn, unsigned int type );
-extern unsigned long shadow_l2_table( unsigned long gpfn );
+extern pagetable_t shadow_mk_pagetable( struct task_struct *p,
+ unsigned long gptbase);
extern int shadow_fault( unsigned long va, long error_code );
extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
unsigned long *prev_spfn_ptr,
l1_pgentry_t **prev_spl1e_ptr );
extern void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte );
-
+extern void unshadow_table( unsigned long gpfn, unsigned int type );
+extern int shadow_mode_enable( struct task_struct *p, unsigned int mode );
#define SHADOW_DEBUG 0
+#define SHADOW_HASH_DEBUG 0
#define SHADOW_OPTIMISE 1
-#endif // end of CONFIG_SHADOW
+struct shadow_status {
+ unsigned long pfn; // gpfn
+ unsigned long spfn_and_flags; // spfn plus flags
+ struct shadow_status *next; // use pull-to-front list.
+};
+
+#define shadow_ht_extra_size 128 /*128*/
+#define shadow_ht_buckets 256 /*256*/
+
+#ifndef NDEBUG
+#define SH_LOG(_f, _a...) \
+ printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
+ current->domain , __LINE__ , ## _a )
+#else
+#define SH_LOG(_f, _a...)
+#endif
#if SHADOW_DEBUG
-extern int check_pagetable( pagetable_t pt, char *s );
+#define SH_VLOG(_f, _a...) \
+ printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
+ current->domain , __LINE__ , ## _a )
+#else
+#define SH_VLOG(_f, _a...)
+#endif
+
+#if 0
+#define SH_VVLOG(_f, _a...) \
+ printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
+ current->domain , __LINE__ , ## _a )
+#else
+#define SH_VVLOG(_f, _a...)
+#endif
+
+
+
+#if SHADOW_HASH_DEBUG
+static void shadow_audit(struct task_struct *p, int print)
+{
+ int live=0, free=0, j=0, abs;
+ struct shadow_status *a;
+
+ for(j=0;j<shadow_ht_buckets;j++)
+ {
+ a = &p->mm.shadow_ht[j];
+ if(a->pfn) live++;
+ while(a->next && live<9999)
+ {
+ live++;
+ if(a->pfn == 0)
+ {
+ printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n",
+ live, a->pfn, a->spfn_and_flags, a->next);
+ BUG();
+ }
+ a=a->next;
+ }
+ ASSERT(live<9999);
+ }
+
+ a = p->mm.shadow_ht_free;
+ while(a) { free++; a=a->next; }
+
+ if(print) printk("live=%d free=%d\n",live,free);
+
+ abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live;
+ if( abs < -1 || abs > 1 )
+ {
+ printk("live=%d free=%d l1=%d l2=%d\n",live,free,
+ perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) );
+ BUG();
+ }
+
+}
+
#else
-#define check_pagetable( pt, s )
+#define shadow_audit(p, print)
+#endif
+
+static inline struct shadow_status* hash_bucket( struct task_struct *p,
+ unsigned int gpfn )
+{
+ return &(p->mm.shadow_ht[gpfn % shadow_ht_buckets]);
+}
+
+
+static inline unsigned long __shadow_status( struct task_struct *p,
+ unsigned int gpfn )
+{
+ struct shadow_status **ob, *b, *B = hash_bucket( p, gpfn );
+
+ b = B;
+ ob = NULL;
+
+ SH_VVLOG("lookup gpfn=%08lx bucket=%08lx", gpfn, b );
+ shadow_audit(p,0); // if in debug mode
+
+ do
+ {
+ if ( b->pfn == gpfn )
+ {
+ unsigned long t;
+ struct shadow_status *x;
+
+ // swap with head
+ t=B->pfn; B->pfn=b->pfn; b->pfn=t;
+ t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags;
+ b->spfn_and_flags=t;
+
+ if(ob)
+ { // pull to front
+ *ob=b->next;
+ x=B->next;
+ B->next=b;
+ b->next=x;
+ }
+ return B->spfn_and_flags;
+ }
+#if SHADOW_HASH_DEBUG
+ else
+ {
+ if(b!=B)ASSERT(b->pfn);
+ }
#endif
+ ob=&b->next;
+ b=b->next;
+ }
+ while (b);
+
+ return 0;
+}
+
+/* we can make this locking more fine grained e.g. per shadow page if it
+ever becomes a problem, but since we need a spin lock on the hash table
+anyway its probably not worth being too clever. */
+
+static inline unsigned long get_shadow_status( struct task_struct *p,
+ unsigned int gpfn )
+{
+ unsigned long res;
+
+ spin_lock(&p->mm.shadow_lock);
+ res = __shadow_status( p, gpfn );
+ if (!res) spin_unlock(&p->mm.shadow_lock);
+ return res;
+}
+
+
+static inline void put_shadow_status( struct task_struct *p )
+{
+ spin_unlock(&p->mm.shadow_lock);
+}
+static inline void delete_shadow_status( struct task_struct *p,
+ unsigned int gpfn )
+{
+ struct shadow_status *b, *B, **ob;
+
+ B = b = hash_bucket( p, gpfn );
+
+ SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b );
+ shadow_audit(p,0);
+ ASSERT(gpfn);
+
+ if( b->pfn == gpfn )
+ {
+ if (b->next)
+ {
+ struct shadow_status *D=b->next;
+ b->spfn_and_flags = b->next->spfn_and_flags;
+ b->pfn = b->next->pfn;
+
+ b->next = b->next->next;
+ D->next = p->mm.shadow_ht_free;
+ p->mm.shadow_ht_free = D;
+ }
+ else
+ {
+ b->pfn = 0;
+ b->spfn_and_flags = 0;
+ }
+
+#if SHADOW_HASH_DEBUG
+ if( __shadow_status(p,gpfn) ) BUG();
+#endif
+ return;
+ }
+
+ ob = &b->next;
+ b=b->next;
+
+ do
+ {
+ if ( b->pfn == gpfn )
+ {
+ b->pfn = 0;
+ b->spfn_and_flags = 0;
+
+ // b is in the list
+ *ob=b->next;
+ b->next = p->mm.shadow_ht_free;
+ p->mm.shadow_ht_free = b;
+
+#if SHADOW_HASH_DEBUG
+ if( __shadow_status(p,gpfn) ) BUG();
+#endif
+ return;
+ }
+
+ ob = &b->next;
+ b=b->next;
+ }
+ while (b);
+
+ // if we got here, it wasn't in the list
+ BUG();
+}
+
+
+static inline void set_shadow_status( struct task_struct *p,
+ unsigned int gpfn, unsigned long s )
+{
+ struct shadow_status *b, *B, *extra, **fptr;
+ int i;
+
+ B = b = hash_bucket( p, gpfn );
+
+ ASSERT(gpfn);
+ ASSERT(s);
+ SH_VVLOG("set gpfn=%08x s=%08lx bucket=%p(%p)", gpfn, s, b, b->next );
+ shadow_audit(p,0);
+
+ do
+ {
+ if ( b->pfn == gpfn )
+ {
+ b->spfn_and_flags = s;
+ return;
+ }
+
+ b=b->next;
+ }
+ while (b);
+
+ // if we got here, this is an insert rather than update
+
+ ASSERT( s ); // deletes must have succeeded by here
+
+ if ( B->pfn == 0 )
+ {
+ // we can use this head
+ ASSERT( B->next == 0 );
+ B->pfn = gpfn;
+ B->spfn_and_flags = s;
+ return;
+ }
+
+ if( unlikely(p->mm.shadow_ht_free == NULL) )
+ {
+ SH_LOG("allocate more shadow hashtable blocks");
+
+ // we need to allocate more space
+ extra = kmalloc( sizeof(void*) + (shadow_ht_extra_size *
+ sizeof(struct shadow_status)), GFP_KERNEL );
+
+ if( ! extra ) BUG(); // should be more graceful here....
+
+ memset( extra, 0, sizeof(void*) + (shadow_ht_extra_size *
+ sizeof(struct shadow_status)) );
+
+ // add extras to free list
+ fptr = &p->mm.shadow_ht_free;
+ for ( i=0; i<shadow_ht_extra_size; i++ )
+ {
+ *fptr = &extra[i];
+ fptr = &(extra[i].next);
+ }
+ *fptr = NULL;
+
+ *((struct shadow_status ** ) &p->mm.shadow_ht[shadow_ht_extra_size]) =
+ p->mm.shadow_ht_extras;
+ p->mm.shadow_ht_extras = extra;
+
+ }
+
+ // should really put this in B to go right to front
+ b = p->mm.shadow_ht_free;
+ p->mm.shadow_ht_free = b->next;
+ b->spfn_and_flags = s;
+ b->pfn = gpfn;
+ b->next = B->next;
+ B->next = b;
+
+ return;
+}
+
+
+
+#if SHADOW_DEBUG
+extern int check_pagetable( struct task_struct *p, pagetable_t pt, char *s );
+#else
+#define check_pagetable( p, pt, s )
+#endif
#endif